package com.langll.ai.tika;

import com.langll.ai.AIChatApplication;
import lombok.extern.slf4j.Slf4j;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.test.context.junit4.SpringRunner;

import java.util.List;

/**
 * 测试分词向量化
 */
@Slf4j
@SpringBootTest(classes = AIChatApplication.class)
@RunWith(SpringRunner.class)
public class TestTika {

     @Test
     public void testTika() {
         List<String> paragraphsHanLP  = TikaUtil.splitParagraphsHanLP("2021年HanLPv2.1为生产环境带来次世代最先进的多语种NLP技术。晓美焰来到北京立方庭参观自然语义科技公司。");
         Assert.assertNotNull(paragraphsHanLP);
     }

}
